## author:    A. D�r, Robert A. Huber, Gemma Mateo, Gabriele Spilker
## contact:   robert.huber@sbg.ac.at
## file name: nti_experiment_cleaning.R
## Context:   Project on NTI in PTAs
## started:   2020-03-10
## Summary:   Prepares and creates the dataframe from the conjoint experiment

df <- readRDS("./data/survey_clean.rds")

dim <- c("ser", "env", "lab", "ipr")
round <- c("a1","b1", "a2", "b2")

vars <- c(apply(expand.grid(dim, round), 1, paste, collapse="_"), #all dimensions
          "exp1", "exp1_favourably_A", "exp1_favourably_B", #outcome round 1
          "exp2", "exp2_favourably_A", "exp2_favourably_B", #outcome round 2
          "seed", "type", "gen_type", "non_EU_OECD_focus", "shr",
          "bus_sector", "ngo_type", "knowledge_intensive", "services_tradeable",
          "general_business", "focus_region1") #id


df_exp <- df[!is.na(df$exp1),vars]

df_exp1A <- df_exp %>%
  dplyr::select(contains("_a1"),"exp1", "exp1_favourably_A", "seed",
                "type",  "gen_type", "non_EU_OECD_focus", "shr",
                "bus_sector", "ngo_type", "knowledge_intensive", "services_tradeable",
                "general_business", "focus_region1") %>%
  rename(ser = ser_a1, env = env_a1, lab = lab_a1, ipr = ipr_a1,
         choice = exp1, rating = exp1_favourably_A) %>%
  mutate(round = 1,
         side = "A",
         respondent = 1:nrow(df_exp))

df_exp1B <- df_exp %>%
  dplyr::select(contains("_b1"),"exp1", "exp1_favourably_B", "seed",
                "type",  "gen_type", "non_EU_OECD_focus", "shr",
                "bus_sector", "ngo_type", "knowledge_intensive", "services_tradeable",
                "general_business", "focus_region1") %>%
  rename(ser = ser_b1, env = env_b1, lab = lab_b1, ipr = ipr_b1,
         choice = exp1, rating = exp1_favourably_B) %>%
  mutate(round = 1,
         side = "B",
         respondent = 1:nrow(df_exp))

df_exp2A <- df_exp %>%
  dplyr::select(contains("_a2"),"exp2", "exp2_favourably_A", "seed",
                "type",  "gen_type", "non_EU_OECD_focus", "shr",
                "bus_sector", "ngo_type", "knowledge_intensive", "services_tradeable",
                "general_business", "focus_region1") %>%
  rename(ser = ser_a2, env = env_a2, lab = lab_a2, ipr = ipr_a2,
         choice = exp2, rating = exp2_favourably_A) %>%
  mutate(round = 2,
         side = "A",
         respondent = 1:nrow(df_exp))

df_exp2B <- df_exp %>%
  dplyr::select(contains("_b2"),"exp2", "exp2_favourably_B", "seed",
                "type",  "gen_type", "non_EU_OECD_focus", "shr",
                "bus_sector", "ngo_type", "knowledge_intensive", "services_tradeable",
                "general_business", "focus_region1") %>%
  rename(ser = ser_b2, env = env_b2, lab = lab_b2, ipr = ipr_b2,
         choice = exp2, rating = exp2_favourably_B) %>%
  mutate(round = 2,
         side = "B",
         respondent = 1:nrow(df_exp))

df_exp <- bind_rows(df_exp1A, df_exp1B,
                    df_exp2A, df_exp2B) %>%
  mutate(choice_label = choice,
         choice = if_else(choice_label == "Agreement A" & side == "A", 1,
                          if_else(choice_label == "Agreement B" & side == "B", 1, 0)),
         choice_neither = if_else(choice_label == "Neither", 1, 0))


#The following recoding steps are necessary to process data via cregg
# which allows to compute marginal means for robustness checks
df_exp$serN = NA
df_exp$serN[df_exp$ser == 0]="Services:No"
df_exp$serN[df_exp$ser == 1]="Services:Yes"
df_exp$ser <- factor(df_exp$serN, levels=c("Services:No", "Services:Yes"))
df_exp$serN <- NULL

df_exp$envN = NA
df_exp$envN[df_exp$env == 0]="Environmental:No"
df_exp$envN[df_exp$env == 1]="Environmental:Yes"
df_exp$env <- factor(df_exp$envN, levels=c("Environmental:No", "Environmental:Yes"))
df_exp$envN <- NULL

df_exp$labN = NA
df_exp$labN[df_exp$lab == 0]="Labour:No"
df_exp$labN[df_exp$lab == 1]="Labour:Yes"
df_exp$lab <- factor(df_exp$labN, levels=c("Labour:No", "Labour:Yes"))
df_exp$labN <- NULL

df_exp$iprN = NA
df_exp$iprN[df_exp$ipr == 0]="IPR:No"
df_exp$iprN[df_exp$ipr == 1]="IPR:Yes"
df_exp$ipr <- factor(df_exp$iprN, levels=c("IPR:No", "IPR:Yes"))
df_exp$iprN <- NULL

rm(list = setdiff(ls(), c("df", "df_exp")))


df_exp <- df_exp %>%
  mutate(knowledge_intensive = ifelse(is.na(knowledge_intensive), -1, knowledge_intensive),
         ngo_type = ifelse(is.na(ngo_type), "Non-citizen group", as.character(ngo_type)),
         ig = factor(ifelse(gen_type == "Business groups" & shr <= .45 & focus_region1 != "world" & general_business == 0, "Import competing business",
                            ifelse(gen_type == "Business groups" & shr >= .55 & focus_region1 != "world" & general_business == 0, "Export oriented business",
                                   ifelse(gen_type == "Citizen groups", "Citizen groups",
                                          ifelse(gen_type == "Labour union", "Labour unions", 
                                                 ifelse(gen_type == "Business groups", "Other business", NA))))),
                     levels = c("Other business", "Export oriented business", "Import competing business","Citizen groups", "Labour unions")),
         ig_export = factor(ifelse(ig == "Export oriented business" & non_EU_OECD_focus == "No", "Northern export oriented business",
                                   ifelse(ig == "Export oriented business" & non_EU_OECD_focus == "Yes", "Southern export oriented business", NA)),
                            levels = c("Southern export oriented business", "Northern export oriented business")),
         ig_ser = factor(ifelse(services_tradeable == 1, "Service business",
                                ifelse(is.na(bus_sector), NA, "Other business")),
                         levels = c("Other business", "Service business")),
         ig_ipr = factor(ifelse(ngo_type == "Health NGO", "Health policy citizen group",
                                ifelse(ngo_type != "Non-citizen group", "Other citizen group",
                                       ifelse(knowledge_intensive == 1, "Knowledge intensive business", 
                                              ifelse(knowledge_intensive == 0, "Other business", NA)))),
                         levels = c("Other business", "Other citizen group", "Knowledge intensive business", "Health policy citizen group")),
         ig_env = factor(ifelse(ngo_type == "Environmental NGO", "Environmental citizen group",
                                ifelse(gen_type != "Citizen groups", NA, "Other citizen group")),
                         levels = c("Other citizen group", "Environmental citizen group")),
         ig_lab = factor(ifelse(ngo_type %in% c("Development NGO", "Human rights NGO", "Social welfare NGO"), "Labour policy citizen group",
                                ifelse(gen_type != "Citizen groups", NA, "Other citizen group")),
                         levels = c("Other citizen group", "Labour policy citizen group")),
         know_ns = factor(ifelse(gen_type == "Business groups" & knowledge_intensive == 1 & non_EU_OECD_focus == "No", "Nothern knowledge intensive business",
                                   ifelse(gen_type == "Business groups" & knowledge_intensive == 1 & non_EU_OECD_focus == "Yes", "Southern knowledge intensive business",
                                          ifelse(gen_type == "Business groups" & knowledge_intensive == 0 & non_EU_OECD_focus == "No", "Nothern other business",
                                                 ifelse(gen_type == "Business groups" & knowledge_intensive == 0 & non_EU_OECD_focus == "Yes", "Southern other business", NA)))),
                            levels = c("Nothern other business", "Southern other business", "Nothern knowledge intensive business", "Southern knowledge intensive business")))
